import scrapy
import time
from novel.items import ComicBookItem
from novel.items import ComicChapterItem
from novel.items import ComicContentItem


class CiweimaoComicSpider(scrapy.Spider):
    name = 'ciweimao_comic'
    allowed_domains = ['ciweimao.com']
    start_urls = ['http://ciweimao.com/index-comic']

    # 获得漫画urls
    def parse(self, response):
        print(response)
        comic_urls = response.xpath('//*[@class="mod-body"]//li/a/@href').getall()
        print(comic_urls)
        for comic_url in comic_urls:
            yield scrapy.Request(url=comic_url, callback=self.parse_comic)

    # 解析漫画简介和章节
    def parse_comic(self, response):
        item = ComicBookItem()
        item['comic_id'] = response.url[-1]
        item['comic_title'] = response.xpath('//*[@class="book-info"]/h3/text()').get()
        item['comic_author'] = response.xpath('//*[@class="book-info"]/h3//a/text()').get()
        item['comic_state'] = response.xpath('//*[@class="book-info"]//div[@class="book-intro"]//li[2]/text()').get()[3:]
        item['comic_type'] = response.xpath('//*[@class="book-info"]//div[@class="book-intro"]//li[4]/text()').get()
        item['comic_intro'] = response.xpath('//*[@class="book-info"]//div[@class="book-desc"]/text()').get()
        item['comic_cover'] = response.xpath('//img[@alt]/@src').get()
        yield item
        chapers = response.xpath('//ul[@class="book-chapter-list"]//a/text()')
        for chapter in chapers:
            chap_item = ComicChapterItem()
            chap_item['comic_chap_title'] = chapter.get()
            chap_item['comic_title'] = item['comic_title']
            yield chap_item
        content_urls = response.xpath('//ul[@class="book-chapter-list"]//a/@href').getall()
        for content_url in content_urls:
            yield scrapy.Request(url=content_url, callback=self.parse_content)

    # 解析漫画内容
    def parse_content(self, response):
        imgs_src = response.xpath('//img/@src').getall()
        chap_title = response.xpath('//*[@class="comic-read-page"]//span/text()').get()
        i = 1
        for img in imgs_src:
            item = ComicContentItem()
            item['comic_id'] = response.url[-1]
            item['comic_con_src'] = img
            item['comic_con_page'] = i
            item['comic_chap_title'] = chap_title
            i += 1
            yield item

